income <- read.csv("https://projectdat.s3.amazonaws.com/income_per_person.csv")
life <- read.csv("https://projectdat.s3.amazonaws.com/life_expectancy_years.csv")
population <- read.csv("https://projectdat.s3.amazonaws.com/population_total.csv")
country <- read.csv("https://projectdat.s3.amazonaws.com/countries_total.csv")
library(dplyr)
#Reshape data set: Income Per Person to make a longitudinal data such that the resulting data set has three columns: country, year, and income.
Income.Per.Person <- income %>%
gather(key = "Year",
value = "Income",
- geo,
na.rm = TRUE)
names(Income.Per.Person)[1] <- "country"
head(Income.Per.Person)
## country Year Income
## 1 Afghanistan X1800 603
## 2 Albania X1800 667
## 3 Algeria X1800 715
## 4 Andorra X1800 1200
## 5 Angola X1800 618
## 6 Antigua and Barbuda X1800 757
#Do the same for Life Expectancy in Years
Life.Expectancy.in.Years <- life %>%
gather(key = "Year",
value = "Life.Expectancy",
- geo,
na.rm = TRUE)
names(Life.Expectancy.in.Years)[1] <- "country"
head(Life.Expectancy.in.Years)
## country Year Life.Expectancy
## 1 Afghanistan X1800 28.2
## 2 Albania X1800 35.4
## 3 Algeria X1800 28.8
## 5 Angola X1800 27.0
## 6 Antigua and Barbuda X1800 33.5
## 7 Argentina X1800 33.2
#Merge/join the above two longitudinal data sets to make a new data set, under name LifeExpIncom that has variables: country, year, lifeExp and income.
LifeExpIncom <- merge(Income.Per.Person, Life.Expectancy.in.Years, by = c("country", "Year"))
#Merge LifeExpIncom with country region so that the final data set has information about income, life expectancy, and country region.
LifeExpIncomFinal <- merge(LifeExpIncom, country, by.x = "country", by.y = "name", all.x = TRUE)
#Merge the previous resulting data set with population size so that the final data set has information about income, life expectancy, population size, and country region.
Pop <- population %>%
gather(key = "Year",
value = "Population",
- geo,
na.rm = TRUE)
names(Pop)[1] <- "country"
head(Pop)
## country Year Population
## 1 Afghanistan X1800 3280000
## 2 Albania X1800 410000
## 3 Algeria X1800 2500000
## 4 Andorra X1800 2650
## 5 Angola X1800 1570000
## 6 Antigua and Barbuda X1800 37000
LEIP <- merge(LifeExpIncomFinal, Pop, by = c("country", "Year"))
#Make an interactive scatter plot to display the association between life expectancy and income for the year 2015. [required]
##Set the point size to be proportional to the population size
##Use different colors for different countries.
##Choose an appropriate transparency level so that overlapped points can be viewed.
##Choose an appropriate color to highlight the point boundary so that partially overlapped points can be easily distinguished.
##Include the country name and population size in the hover text.
LEIP.2015 <- LEIP %>%
filter(
Year == "X2015"
) %>%
mutate(pmm = Population / 1000000) %>%
select(country, Year, Life.Expectancy, Income, Population, pmm)
plot_ly(
data = LEIP.2015,
x = ~Income,
y = ~Life.Expectancy,
color = ~country,
stroke = I("royalblue"),
hoverinfo = "text",
hovertext = paste("Country: ", LEIP.2015$country,
"<br> Population: ", LEIP.2015$pmm, "Million"),
alpha = 0.6,
size = ~Population,
sizes = c(10, 5000),
type = "scatter",
mode = "markers"
) %>%
layout(title = "Association Between Life Expectancy & Income in 2015",
titlefont = list(
family = "Helvetica",
size = 25,
color = 'Royalblue'),
margin = 10,
plot_bgcolor = "#e5ecf6",
xaxis = list(title = 'Income (US$)'),
yaxis = list(title = 'Life Expectancy (Yr)'),
showlegend = FALSE)
#Make an animated scatter plot that shows pattern of change in the relationship between life expectancy and income over the years. [required]
##Set the point size to be proportional to the population size
##Use different colors for different regions.
##Choose an appropriate transparency level so that overlapped points can be viewed.
##Choose an appropriate color to highlight the point boundary so that partially overlapped points can be easily distinguished.
LEIP2 <-mutate(LEIP, year.num = as.numeric(substring(Year, 2)))
p <- ggplot(LEIP2, aes(x = Income,
y = Life.Expectancy,
fill = region)) +
geom_point(aes(size = Population),
color = 'black',
pch = 21,
show.legend = FALSE,
alpha = 0.3) +
# scale_color_manual(values = country_colors) +
scale_size_continuous(range = c(2, 25)) +
scale_x_log10() +
labs(title = "Year: {frame_time}",
x = "Income (US$)",
y = "Life Expectancy (Yr)") +
transition_time(year.num) +
ease_aes('linear')
anim_save("LifeExp.gif", p)
animate(p, renderer = gifski_renderer(),
rewind = TRUE)

#Choose an appropriate R map library to create an interactive map of the gas station data and show some information of each gas station on the map.
##Take a random sample 500 gas stations from the US to plot on the map
##The information to be included in the hover/popups: State, county, address and the zip code.
GS <- read.csv("https://projectdat.s3.amazonaws.com/POC.csv")
GS.500 <- GS[sample(nrow(GS), 500), ]
# geo styling
g <- list( scope = 'usa',
projection = list(type = 'albers usa'),
showland = TRUE,
landcolor = toRGB("gray95"),
subunitcolor = toRGB("gray85"),
countrycolor = toRGB("gray85"),
countrywidth = 0.5,
subunitwidth = 0.5
)
fig <- plot_geo(GS.500, lat = ~ycoord, lon = ~xcoord) %>%
add_markers( text = ~paste(STATE, county, ADDRESS, ZIPnew, sep = "<br>"),
color = 'Orange',
symbol = "circle",
hoverinfo = "text") %>%
layout( title = '500 Randomly Selected Gas Stations in the U.S.',
geo = g )
fig